metafile <- './quickdraw classification.csv'
meta <- read_csv(metafile, skip = 1)
Parsed with column specification:
cols(
name = col_character(),
animal = col_logical(),
food = col_logical(),
fruit = col_logical(),
veggie = col_logical(),
building = col_logical(),
vechicle = col_logical(),
bug = col_logical(),
bird = col_logical(),
plant = col_logical()
)
name_parts <- str_split_fixed(meta$name, coll("."), 2)
meta$basename <- name_parts[, 1]
bugs <- meta %>% filter(bug == TRUE)
Warning message:
In scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
EOF within quoted string
filename <- 'data/dog.stats.csv'
dogs_org <- read_csv(filename)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
filename <- 'data/cat.stats.csv'
cats_org <- read_csv(filename)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
prepare_data <- function (data) {
new_data <- data %>% mutate(drawing_time_seconds = drawing_time / 1000, drawing_time_draw_seconds = drawing_time_draw / 1000, drawing_time_pause_seconds = drawing_time_pause / 1000)
new_data <- new_data %>%
# a few have time values not in incremental
filter(stroke_in_order == 0) %>%
# a few have weird times
filter(drawing_time_min >= 0) %>%
filter(drawing_time_max > 0) %>%
filter(drawing_time_draw > 100)
return(new_data)
}
dogs <- prepare_data(dogs_org)
cats <- prepare_data(cats_org)
dogs %>% filter(recognized == 'True') %>% filter(drawing_time_seconds < 25) %>% #filter(stroke_count < 4) %>%
ggplot(aes(x = drawing_time_seconds, y = (..count..)/sum(..count..))) +
geom_histogram(binwidth = 0.1, fill = "#607d8b") +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(limits = c(0,20)) +
labs(title = "Dog Total Drawing Time", y = "", x = "") +
theme_joy()
ggsave("dog_total.png", width = 6, height = 5)
rr dogs_cats <- rbind(dogs, cats)
rr dogs_cats %>% filter(drawing_time_draw_seconds <= 20) %>% ggplot(aes(x = drawing_time_draw_seconds, y = word))+ geom_joy(scale = 4) + theme_joy() + scale_y_discrete(expand = c(0.01, 0)) + scale_x_continuous(expand = c(0, 0))
rr dogs_cats_rec_sum <- dogs_cats %>% group_by(word, recognized) %>% summarise(count = n()) %>% mutate(freq = count / sum(count))
rr cats_rec <- cats %>% filter(recognized == ‘True’) dogs_rec <- dogs %>% filter(recognized == ‘True’)
rr dogs_cats_rec <- rbind(dogs_rec, cats_rec)
rr dogs_cats_rec %>% ggplot(aes(x = word, y = drawing_time_pause_seconds)) + geom_boxplot()
rr summary(dogs_rec$drawing_time_pause_seconds)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 1.845 3.154 3.418 4.705 133.600
rr summary(cats_rec$drawing_time_pause_seconds)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 2.561 3.699 4.118 5.291 261.400
rr
#dogs_cats <- rbind(cat_drawings_rec, dog_drawings_rec)
rr NA
rr dogs_cats %>% filter(drawing_time_seconds > 20) %>% count() r dogs_cats %>% filter(drawing_time_seconds > 20) %>% count() / nrow(dogs_cats)
rr summary(dogs_rec$drawing_time_seconds)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.107 8.097 10.460 10.830 13.200 143.900
rr summary(cats_rec$drawing_time_seconds)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.114 6.744 8.963 9.593 12.120 267.000
rr dogs_cats %>% filter(drawing_time_seconds > 20) %>% ggplot(aes(x = word, y = drawing_time_pause_seconds)) + geom_boxplot()
rr NA
rr pg
$data
$data[[1]]
$data[[2]]
$layout
<ggproto object: Class Layout>
facet: <ggproto object: Class FacetNull, Facet>
compute_layout: function
draw_back: function
draw_front: function
draw_labels: function
draw_panels: function
finish_data: function
init_scales: function
map: function
map_data: function
params: list
render_back: function
render_front: function
render_panels: function
setup_data: function
setup_params: function
shrink: TRUE
train: function
train_positions: function
train_scales: function
vars: function
super: <ggproto object: Class FacetNull, Facet>
finish_data: function
get_scales: function
map: function
map_position: function
panel_layout: data.frame
panel_ranges: list
panel_scales: list
render: function
render_labels: function
reset_scales: function
setup: function
train_position: function
train_ranges: function
xlabel: function
ylabel: function
super: <ggproto object: Class Layout>
$plot
rr dogs_rec <- dogs_rec %>% mutate(time_bin = floor(drawing_time_draw_seconds)) dog_bins <- dogs_rec %>% group_by(time_bin) %>% summarise(count = n()) %>% mutate( freq = count / sum(count))
rr cats_rec <- cats_rec %>% mutate(time_bin = floor(drawing_time_draw_seconds)) cat_bins <- cats_rec %>% group_by(time_bin) %>% summarise(count = n()) %>% mutate( freq = count / sum(count))
rr dog_bins %>% ggplot() + geom_bar(data = dog_bins, mapping = aes(x = time_bin, y = freq), stat=‘identity’, fill = ‘blue’, alpha = 0.6) + geom_bar(data = cat_bins, mapping = aes(x = time_bin, y = freq), stat=‘identity’, fill = ‘red’, alpha = 0.6) + scale_x_continuous(limits=c(0, 25))
Test if the different n sizes affecting data.
rr cats_rec_sample <- cats_rec %>% sample_n(50000) dogs_rec_sample <- dogs_rec %>% sample_n(50000)
rr mean(cats_rec$drawing_time_seconds)
[1] 9.593239
rr mean(dogs_rec$drawing_time_seconds)
[1] 10.82946
rr mean(cats_rec$drawing_time_draw_seconds)
[1] 5.474867
rr mean(dogs_rec$drawing_time_draw_seconds)
[1] 7.411101
rr mean(cats_rec$stroke_count)
[1] 9.522221
rr mean(dogs_rec$stroke_count)
[1] 7.030852
rr dogs_rec %>% filter(drawing_time_draw_seconds < 30) %>% ggplot(aes(x = drawing_time_draw_seconds, y = stroke_count)) + geom_point(alpha = 1 / 100)
rr NA
rr cats_rec %>% ggplot(aes(x = drawing_time_draw_seconds, y = stroke_count)) + geom_point(alpha = 1 / 10)
rr NA
rr cats_rec %>% filter(drawing_time_pause_seconds < 20) %>% ggplot(aes(x = drawing_time_pause_seconds, y = stroke_count)) + geom_point(alpha = 1 / 10) + geom_smooth(method=‘lm’)
rr cats_rec_filtered <- cats_rec %>% filter(drawing_time_pause_seconds < 20)
rr fit <- lm(drawing_time_pause_seconds ~ stroke_count , data = cats_rec_filtered) summary(fit)
Call:
lm(formula = drawing_time_pause_seconds ~ stroke_count, data = cats_rec_filtered)
Residuals:
Min 1Q Median 3Q Max
-18.5908 -1.0138 -0.2461 0.8020 13.8677
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.301497 0.014292 21.09 <2e-16 ***
stroke_count 0.400271 0.001411 283.67 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.563 on 102933 degrees of freedom
Multiple R-squared: 0.4388, Adjusted R-squared: 0.4387
F-statistic: 8.047e+04 on 1 and 102933 DF, p-value: < 2.2e-16
rr countrycodes_dogs <- dogs_rec %>% group_by(countrycode) %>% summarise(country_count = n()) %>% arrange(-country_count) countrycodes_cats <- cats_rec %>% group_by(countrycode) %>% summarise(country_count = n()) %>% arrange(-country_count)
rr top_countrycodes <- countrycodes_dogs %>% head(n = 30)
rr dogs_rec_top_countries %>% filter(drawing_time_draw_seconds < 30) %>% ggplot(aes(x = drawing_time_draw_seconds)) + facet_wrap(~ countrycode) + geom_histogram(aes(y=(..count..)/tapply(..count..,..PANEL..,sum)[..PANEL..]), binwidth = 1)
rr dogs_rec_top_countries %>% filter(drawing_time_draw_seconds < 30) %>% ggplot(aes(x = drawing_time_draw_seconds)) + facet_wrap(~ countrycode) + geom_histogram(aes(y=(..count..)/tapply(..count..,..PANEL..,sum)[..PANEL..]), binwidth = 1)
rr NA
rr dogs_rec_top_countries %>% filter(drawing_time_pause_seconds < 15) %>% ggplot(aes(x = drawing_time_pause_seconds)) + facet_wrap(~ countrycode) + geom_histogram(aes(y=(..count..)/tapply(..count..,..PANEL..,sum)[..PANEL..]), binwidth = 1)
rr NA
read_names <- function (names) {
data_path <- './data/'
all_cates <- vector("list", nrow(names))
for(i in 1:nrow(names)) {
name_file <- names[i, "basename"]
full_path <- paste(data_path, name_file, ".stats.csv", sep='')
name <- read_csv(full_path)
all_cates[[i]] <- name
}
all_cates_df <- do.call("rbind", all_cates)
all_cates_df_clean <- prepare_data(all_cates_df)
all_cates_df_clean_rec <- all_cates_df_clean %>% filter(recognized == 'True')
return(all_cates_df_clean_rec)
}
all_bugs_df <- read_names(bugs)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
Parsed with column specification:
cols(
key_id = col_double(),
recognized = col_character(),
word = col_character(),
stroke_count = col_integer(),
countrycode = col_character(),
drawing_time = col_integer(),
drawing_time_pause = col_integer(),
drawing_time_draw = col_integer(),
stroke_in_order = col_integer(),
drawing_time_min = col_integer(),
drawing_time_max = col_integer()
)
bugs_agg <- all_bugs_df %>% group_by(word) %>% summarise(count = n(),
stroke_count_mean = mean(stroke_count),
stroke_count_med = median(stroke_count),
drawing_time_pause_sec_mean = mean(drawing_time_pause_seconds),
drawing_time_pause_sec_med = median(drawing_time_pause_seconds),
drawing_time_draw_sec_mean = mean(drawing_time_draw_seconds),
drawing_time_draw_sec_med = median(drawing_time_draw_seconds)) %>%
arrange(-drawing_time_draw_sec_mean)
all_bugs_df %>%
ggplot(aes(x = drawing_time_draw_seconds, y = factor(word, levels = bugs_agg$word), fill = factor(word, levels = bugs_agg$word))) +
geom_joy(size = 0.55 ) +
theme_joy() +
scale_x_continuous(limits=c(1, 20), expand = c(0.01, 0)) +
scale_y_discrete(expand = c(0.01, 0)) +
scale_fill_brewer(guide = FALSE, palette = "Greys", direction = -1) +
labs(title = 'Bugs', x = '', y = '')
ggsave('bugs.png', width = 8, height = 12)
fruits <- meta %>% filter(fruit == TRUE)
all_fruits <- read_names(fruits)
|========= | 11%
|========= | 12%
|========== | 12%
|========== | 13% 1 MB
|=========== | 14% 1 MB
|=========== | 14% 1 MB
|============ | 15% 1 MB
|============= | 16% 1 MB
|============= | 17% 1 MB
|============== | 17% 1 MB
|============== | 18% 1 MB
|=============== | 19% 1 MB
|=============== | 19% 1 MB
|================ | 20% 1 MB
|================= | 21% 1 MB
|================= | 22% 1 MB
|================== | 22% 1 MB
|================== | 23% 1 MB
|=================== | 24% 1 MB
|=================== | 24% 1 MB
|==================== | 25% 1 MB
|===================== | 26% 1 MB
|===================== | 27% 2 MB
|====================== | 27% 2 MB
|====================== | 28% 2 MB
|======================= | 29% 2 MB
|======================= | 29% 2 MB
|======================== | 30% 2 MB
|========================= | 31% 2 MB
|========================= | 31% 2 MB
|========================== | 32% 2 MB
|========================== | 33% 2 MB
|=========================== | 34% 2 MB
|=========================== | 34% 2 MB
|============================ | 35% 2 MB
|============================= | 36% 2 MB
|============================= | 36% 2 MB
|============================== | 37% 2 MB
|============================== | 38% 2 MB
|=============================== | 39% 2 MB
|=============================== | 39% 3 MB
|================================ | 40% 3 MB
|================================ | 41% 3 MB
|================================= | 41% 3 MB
|================================== | 42% 3 MB
|================================== | 43% 3 MB
|=================================== | 44% 3 MB
|=================================== | 44% 3 MB
|==================================== | 45% 3 MB
|==================================== | 46% 3 MB
|===================================== | 46% 3 MB
|====================================== | 47% 3 MB
|====================================== | 48% 3 MB
|======================================= | 49% 3 MB
|======================================= | 49% 3 MB
|======================================== | 50% 3 MB
|======================================== | 51% 3 MB
|========================================= | 51% 3 MB
|========================================== | 52% 3 MB
|========================================== | 53% 4 MB
|=========================================== | 54% 4 MB
|=========================================== | 54% 4 MB
|============================================ | 55% 4 MB
|============================================ | 56% 4 MB
|============================================= | 56% 4 MB
|============================================== | 57% 4 MB
|============================================== | 58% 4 MB
|=============================================== | 58% 4 MB
|=============================================== | 59% 4 MB
|================================================ | 60% 4 MB
|================================================ | 61% 4 MB
|================================================= | 61% 4 MB
|================================================== | 62% 4 MB
|================================================== | 63% 4 MB
|=================================================== | 63% 4 MB
|=================================================== | 64% 4 MB
|==================================================== | 65% 4 MB
|==================================================== | 66% 4 MB
|===================================================== | 66% 5 MB
|====================================================== | 67% 5 MB
|====================================================== | 68% 5 MB
|======================================================= | 68% 5 MB
|======================================================= | 69% 5 MB
|======================================================== | 70% 5 MB
|======================================================== | 71% 5 MB
|========================================================= | 71% 5 MB
|========================================================== | 72% 5 MB
|========================================================== | 73% 5 MB
|=========================================================== | 73% 5 MB
|=========================================================== | 74% 5 MB
|============================================================ | 75% 5 MB
|============================================================ | 76% 5 MB
|============================================================= | 76% 5 MB
|============================================================= | 77% 5 MB
|============================================================== | 78% 5 MB
|=============================================================== | 78% 5 MB
|=============================================================== | 79% 6 MB
|================================================================ | 80% 6 MB
|================================================================ | 81% 6 MB
|================================================================= | 81% 6 MB
|================================================================= | 82% 6 MB
|================================================================== | 83% 6 MB
|=================================================================== | 83% 6 MB
|=================================================================== | 84% 6 MB
|==================================================================== | 85% 6 MB
|==================================================================== | 86% 6 MB
|===================================================================== | 86% 6 MB
|===================================================================== | 87% 6 MB
|====================================================================== | 88% 6 MB
|======================================================================= | 88% 6 MB
|======================================================================= | 89% 6 MB
|======================================================================== | 90% 6 MB
|======================================================================== | 90% 6 MB
|========================================================================= | 91% 6 MB
|========================================================================= | 92% 6 MB
|========================================================================== | 93% 7 MB
|=========================================================================== | 93% 7 MB
|=========================================================================== | 94% 7 MB
|============================================================================ | 95% 7 MB
|============================================================================ | 95% 7 MB
|============================================================================= | 96% 7 MB
|============================================================================= | 97% 7 MB
|============================================================================== | 98% 7 MB
|===============================================================================| 98% 7 MB
|===============================================================================| 99% 7 MB
|================================================================================| 100% 7 MB
fruits_agg <- all_fruits %>% group_by(word) %>% summarise(count = n(),
stroke_count_mean = mean(stroke_count),
stroke_count_med = median(stroke_count),
drawing_time_pause_sec_mean = mean(drawing_time_pause_seconds),
drawing_time_pause_sec_med = median(drawing_time_pause_seconds),
drawing_time_draw_sec_mean = mean(drawing_time_draw_seconds),
drawing_time_draw_sec_med = median(drawing_time_draw_seconds)) %>%
arrange(-drawing_time_draw_sec_mean)
all_fruits %>%
ggplot(aes(x = drawing_time_draw_seconds, y = factor(word, levels = fruits_agg$word), fill = factor(word, levels = fruits_agg$word))) +
geom_joy(size = 0.55 ) +
theme_joy() +
scale_x_continuous(limits=c(1, 20), expand = c(0.01, 0)) +
scale_y_discrete(expand = c(0.01, 0)) +
scale_fill_brewer(guide = FALSE, palette = "Oranges", direction = -1) +
labs(title = 'Fruits', x = '', y = '')
ggsave('fruits.png', width = 8, height = 12)
all_fruits %>% filter(word == "banana") %>%
ggplot(aes(x = drawing_time_seconds, y = (..count..)/sum(..count..))) +
geom_histogram(binwidth = 0.1, fill = "#FFD600") +
scale_x_continuous(limits = c(0, 20)) +
scale_y_continuous(labels = scales::percent) +
theme_joy() +
labs(title = "Banana Total Drawing Time", x = "", y = "")
ggsave("banana_total.png", width = 6, height = 5)
all_fruits %>% filter(word == "banana") %>%
ggplot(aes(x = drawing_time)) +
geom_histogram(binwidth = 100) +
scale_x_continuous(limits = c(0, 20000))
all_fruits %>% filter(word == "banana") %>%
ggplot(aes(x = stroke_count)) +
geom_histogram(binwidth = 1)
#scale_x_continuous(limits = c(0, 20))
all_fruits %>% filter(stroke_count < 5) %>%
ggplot(aes(x = drawing_time)) +
geom_histogram(binwidth = 100) +
scale_x_continuous(limits = c(0, 20000))
all_cates %>% #filter(stroke_count < 5) %>%
ggplot(aes(x = drawing_time_seconds, y = (..count..)/sum(..count..))) +
geom_histogram(binwidth = 1) +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(limits = c(0, 10)) +
theme_joy() +
labs(title = "All Categories Total Drawing Time (binwidth = 1)", y = '', x = '')
ggsave('all_total_1.png', width = 6, height = 5)
bananas %>% filter(recognized == 'True') %>%
ggplot(aes(x = drawing_time_draw_seconds )) +
geom_histogram(binwidth = 0.1) +
scale_x_continuous(limits = c(0.5, 8))
ggplot() +
geom_histogram(data = bananas, aes(x = drawing_time_seconds, y = (..count..)/sum(..count..)), alpha = 1.0, fill = "#FFD600", binwidth = 0.1) +
geom_histogram(data = all_fruits %>% filter(stroke_count < 5), aes(x = drawing_time_seconds, y = (..count..)/sum(..count..)), alpha = 0.6, fill = 'black', binwidth = 0.1) +
#geom_histogram(data = dogs, aes(x = drawing_time_seconds, y = (..count..)/sum(..count..)), alpha = 0.6, fill = 'blue', binwidth = 0.1) +
scale_x_continuous(limits = c(0, 10), breaks = seq(0,10,0.5)) +
scale_y_continuous(labels = scales::percent) +
labs(title = "Banana vs All Fruit Drawing Time (binwidth = 0.1)", y = "", x = "Drawing Time (secs)") +
theme_light()
ggsave("bananas_vs_fruit.png", width = 6, height = 5)
mean(all_fruits$drawing_time_seconds)
[1] 6.211525
mean(dogs$drawing_time_seconds)
[1] 11.08655
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
getmode(bananas$drawing_time_seconds)
[1] 2.615
summary()
bananas_two_sec %>%
ggplot(aes(x = factor(stroke_count))) +
geom_bar()
bananas %>% filter(stroke_count < 4) %>%
ggplot(aes(x = drawing_time_seconds)) +
geom_histogram(binwidth = 0.5) +
scale_x_continuous(limits = c(1, 8)) +
facet_wrap(~ stroke_count)
pears %>% filter(stroke_count < 6) %>%
ggplot(aes(x = drawing_time_seconds)) +
geom_histogram(binwidth = 0.5) +
scale_x_continuous(limits = c(0, 10)) +
facet_wrap(~ stroke_count)
dogs %>% filter(stroke_count < 4) %>% filter(drawing_time_seconds < 20) %>% filter(recognized == 'True') %>%
ggplot(aes(x = drawing_time_seconds)) +
geom_histogram(binwidth = 0.1) +
scale_x_continuous(limits = c(4, 11)) +
facet_wrap(~ stroke_count)
dogs_m <- dogs %>% mutate(sec = floor(drawing_time_seconds), time_in_sec = drawing_time_seconds - sec)
dogs_m %>% filter(stroke_count < 4) %>% filter(sec %in% c(4,5,6,7,8,9, 10, 11, 12, 13, 14, 15)) %>%
ggplot(aes(x = time_in_sec)) +
geom_histogram(binwidth = 0.01) +
#scale_x_continuous(limits = c(1, 20)) +
facet_wrap(~ sec)
dogs %>% filter(stroke_count < 4) %>% filter(drawing_time_seconds < 20) %>%
ggplot(aes(x = drawing_time_seconds)) +
geom_area()
Error in eval(expr, envir, enclos) : object 'y' not found
bananas_by_time <- bananas %>% group_by(round_time, stroke_count) %>% summarise(n = n()) %>% mutate(freq = n / sum(n))
bananas_by_time %>% filter(round_time < 20) %>% filter(stroke_count < 10) %>%
ggplot(aes(x = round_time, fill = factor(stroke_count), y = freq)) +
geom_bar(stat = 'identity')
animals_meta <- meta %>% filter(animal == TRUE)
all_animals <- read_names(animals_meta)
library(RColorBrewer)
animals_agg <- all_animals %>% group_by(word) %>% summarise(count = n(),
stroke_count_mean = mean(stroke_count),
stroke_count_med = median(stroke_count),
drawing_time_pause_sec_mean = mean(drawing_time_pause_seconds),
drawing_time_pause_sec_med = median(drawing_time_pause_seconds),
drawing_time_draw_sec_mean = mean(drawing_time_draw_seconds),
drawing_time_draw_sec_med = median(drawing_time_draw_seconds)) %>%
arrange(-drawing_time_draw_sec_mean)
all_animals %>%
ggplot(aes(x = drawing_time_draw_seconds, y = factor(word, levels = animals_agg$word), fill = factor(word, levels = animals_agg$word))) +
geom_joy(size = 0.55 ) +
theme_joy() +
scale_x_continuous(limits=c(1, 18), expand = c(0.01, 0)) +
scale_y_discrete(expand = c(0.01, 0)) +
#scale_fill_brewer(guide = FALSE, palette = "YlGnBu", direction = -1) +
scale_fill_manual(values=rep(brewer.pal(6,"GnBu"),times=9), guide = FALSE) +
labs(title = 'Animals', x = '', y = '')
ggsave('animals.png', width = 8, height = 12)
all_cates <- read_names(meta)
all_agg <- all_cates %>% group_by(word) %>% summarise(n = n(),
scount_me = mean(stroke_count),
scount_md = median(stroke_count),
d_pause_me = mean(drawing_time_pause_seconds),
d_pause_md = median(drawing_time_pause_seconds),
d_draw_me = mean(drawing_time_draw_seconds),
d_draw_md = median(drawing_time_draw_seconds),
d_total_md = median(drawing_time_seconds),
d_total_me = mean(drawing_time_seconds)) %>%
arrange(-d_draw_me)
all_agg_rec <- all_cates %>% filter(recognized == 'True') %>% group_by(word) %>% summarise(n = n(),
scount_me = mean(stroke_count),
scount_md = median(stroke_count),
d_pause_me = mean(drawing_time_pause_seconds),
d_pause_md = median(drawing_time_pause_seconds),
d_draw_me = mean(drawing_time_draw_seconds),
d_draw_md = median(drawing_time_draw_seconds),
d_total_md = median(drawing_time_seconds),
d_total_me = mean(drawing_time_seconds)) %>%
arrange(-d_draw_me)
all_agg %>%
ggplot(aes(x = drawing_time_draw_sec_mean, y = drawing_time_draw_sec_med)) +
geom_point()
library(ggbeeswarm)
all_agg %>%
ggplot(aes(x = drawing_time_draw_sec_med, y = '')) +
geom_beeswarm(groupOnX = FALSE, cex=2.8, size = 2) +
labs(x = '', y = '')
#geom_quasirandom(groupOnX = FALSE, varwidth = TRUE)
write_csv(all_agg, 'all_aggregates.csv')
data_path <- './data/'
stat_files <- list.files(path = data_path, pattern = '\\.stats\\.csv')
length(stat_files)
all_cates <- vector("list",length(stat_files))
length(all_cates)
for(i in 1:length(stat_files)) {
stat_file <- stat_files[i]
full_path <- paste(data_path, stat_file, sep='')
print(full_path)
cate_org <- read_csv(full_path)
cate <- prepare_data(cate_org)
all_cates[[i]] <- cate
}
all_cates_df <- do.call("rbind", all_cates)
all_cates_df_clean <- prepare_data(all_cates_df)
all_cates_df_clean_rec <- all_cates_df_clean %>% filter(recognized == 'True')
all_cates_mean <- all_cates_df_clean_rec %>% group_by(word) %>%
summarise(count = n(),
stroke_count_mean = mean(stroke_count),
stroke_count_med = median(stroke_count),
drawing_time_pause_sec_mean = mean(drawing_time_pause_seconds),
drawing_time_pause_sec_med = median(drawing_time_pause_seconds),
drawing_time_draw_sec_mean = mean(drawing_time_draw_seconds),
drawing_time_draw_sec_med = median(drawing_time_draw_seconds)) %>%
arrange(-drawing_time_draw_sec_mean)
all_cates_df_clean_rec$word <- factor(all_cates_df_clean_rec$word, levels = all_cates_df_clean_rec$word[order(all_cates_df_clean_rec$word)])
all_cates_df_clean_rec %>% filter(drawing_time_draw_seconds <= 20) %>%
ggplot(aes(x = drawing_time_draw_seconds, y = word ))+
geom_joy(scale = 4) + theme_joy()
all_cates_df %>% filter(word == 'dog') %>% count()
all_cates_df_clean_rec %>% filter(word == 'dog') %>% filter(drawing_time_draw_seconds <= 20) %>%
ggplot(aes(x = drawing_time_draw_seconds)) +
geom_histogram(binwidth = 1) +
labs(title = "Histogram of Drawing Time")